/* eslint-disable @typescript-eslint/no-explicit-any */
/* eslint-disable @typescript-eslint/no-empty-function */

import { TruncationStrategy, BPE, Encoding, AddedToken, Tokenizer } from '../../'

// jest.mock('../../bindings/tokenizer');
// jest.mock('../../bindings/models', () => ({
//   __esModule: true,
//   Model: jest.fn()
// }));

// Or:
// jest.mock('../../bindings/models', () => {
//   return require('../../bindings/__mocks__/models');
// });

// const TokenizerMock = mocked(Tokenizer);

describe('AddedToken', () => {
  it('instantiates with only content', () => {
    const addToken = new AddedToken('test', false)
    expect(addToken.constructor.name).toEqual('AddedToken')
  })

  it('instantiates with empty options', () => {
    const addToken = new AddedToken('test', false, {})
    expect(addToken.constructor.name).toEqual('AddedToken')
  })

  it('instantiates with options', () => {
    const addToken = new AddedToken('test', false, {
      leftStrip: true,
      rightStrip: true,
      singleWord: true,
    })
    expect(addToken.constructor.name).toEqual('AddedToken')
  })

  describe('getContent', () => {
    it('returns the string content of AddedToken', () => {
      const addedToken = new AddedToken('test', false)
      expect(addedToken.getContent()).toEqual('test')
    })
  })
})

describe('Tokenizer', () => {
  it('has expected methods', () => {
    const model = BPE.empty()
    const tokenizer = new Tokenizer(model)

    expect(typeof Tokenizer.fromFile).toBe('function')
    expect(typeof Tokenizer.fromString).toBe('function')
    // expect(typeof Tokenizer.fromPretrained).toBe('function')

    expect(typeof tokenizer.addSpecialTokens).toBe('function')
    expect(typeof tokenizer.addTokens).toBe('function')
    expect(typeof tokenizer.decode).toBe('function')
    expect(typeof tokenizer.decodeBatch).toBe('function')
    expect(typeof tokenizer.disablePadding).toBe('function')
    expect(typeof tokenizer.disableTruncation).toBe('function')
    expect(typeof tokenizer.encode).toBe('function')
    expect(typeof tokenizer.encodeBatch).toBe('function')
    expect(typeof tokenizer.getDecoder).toBe('function')
    expect(typeof tokenizer.getNormalizer).toBe('function')
    expect(typeof tokenizer.getPostProcessor).toBe('function')
    expect(typeof tokenizer.getPreTokenizer).toBe('function')
    expect(typeof tokenizer.getVocab).toBe('function')
    expect(typeof tokenizer.getVocabSize).toBe('function')
    expect(typeof tokenizer.idToToken).toBe('function')
    expect(typeof tokenizer.runningTasks).toBe('function')
    expect(typeof tokenizer.save).toBe('function')
    expect(typeof tokenizer.setDecoder).toBe('function')
    expect(typeof tokenizer.setModel).toBe('function')
    expect(typeof tokenizer.setNormalizer).toBe('function')
    expect(typeof tokenizer.setPadding).toBe('function')
    expect(typeof tokenizer.setPostProcessor).toBe('function')
    expect(typeof tokenizer.setPreTokenizer).toBe('function')
    expect(typeof tokenizer.setTruncation).toBe('function')
    expect(typeof tokenizer.tokenToId).toBe('function')
    expect(typeof tokenizer.toString).toBe('function')
    expect(typeof tokenizer.train).toBe('function')
  })

  // it('can be instantiated from the hub', async () => {
  //   let tokenizer: Tokenizer
  //   let output: Encoding

  //   tokenizer = Tokenizer.fromPretrained('bert-base-cased')
  //   output = await tokenizer.encode('Hey there dear friend!', null, { addSpecialTokens: false })
  //   expect(output.getTokens()).toEqual(['Hey', 'there', 'dear', 'friend', '!'])

  //   tokenizer = Tokenizer.fromPretrained('anthony/tokenizers-test')
  //   output = await tokenizer.encode('Hey there dear friend!', null, { addSpecialTokens: false })
  //   expect(output.getTokens()).toEqual(['hey', 'there', 'dear', 'friend', '!'])

  //   tokenizer = Tokenizer.fromPretrained('anthony/tokenizers-test', {
  //     revision: 'gpt-2',
  //   })
  //   output = await tokenizer.encode('Hey there dear friend!', null, { addSpecialTokens: false })
  //   expect(output.getTokens()).toEqual(['Hey', 'Ġthere', 'Ġdear', 'Ġfriend', '!'])
  // }, 10000)

  describe('addTokens', () => {
    it('accepts a list of string as new tokens when initial model is empty', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)

      const nbAdd = tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
      expect(nbAdd).toBe(5)
    })

    it('accepts a list of AddedToken as new tokens when initial model is empty', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)
      const addedToken = new AddedToken('test', false)

      const nbAdd = tokenizer.addAddedTokens([addedToken])
      expect(nbAdd).toBe(1)
    })
  })

  describe('encode', () => {
    let tokenizer: Tokenizer
    beforeEach(() => {
      // Clear all instances and calls to constructor and all methods:
      // TokenizerMock.mockClear();

      const model = BPE.empty()
      tokenizer = new Tokenizer(model)
      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
    })

    it('accepts a pair of strings as parameters', async () => {
      const encoding = await tokenizer.encode('my name is john', 'pair')
      expect(encoding).toBeDefined()
    })

    it('accepts a string with a null pair', async () => {
      const encoding = await tokenizer.encode('my name is john', null)
      expect(encoding).toBeDefined()
    })

    // TODO
    // it("throws if we try to encode a pre-tokenized string without isPretokenized=true", async () => {
    //   await expect((encode as any)(["my", "name", "is", "john"], null)).rejects.toThrow(
    //     "encode with isPreTokenized=false expect string"
    //   );
    // });

    // it("accepts a pre-tokenized string as parameter", async () => {
    //   const encoding = await tokenizer.encode(["my", "name", "is", "john"], undefined, {
    //     isPretokenized: true,
    //   });
    //   expect(encoding).toBeDefined();
    // });

    // it("throws if we try to encodeBatch pre-tokenized strings without isPretokenized=true", async () => {
    //   await expect((encodeBatch as any)([["my", "name", "is", "john"]])).rejects.toThrow(
    //     "encodeBatch with isPretokenized=false expects input to be `EncodeInput[]` " +
    //       "with `EncodeInput = string | [string, string]`"
    //   );
    // });

    // it("accepts a pre-tokenized input in encodeBatch", async () => {
    //   const encoding = await tokenizer.encodeBatch([["my", "name", "is", "john"]], {
    //     isPretokenized: true,
    //   });
    //   expect(encoding).toBeDefined();
    // });

    it('Encodes correctly if called with only one argument', async () => {
      const encoded = await tokenizer.encode('my name is john')
      expect(encoded.getIds()).toEqual([0, 1, 2, 3])
    })

    it('returns an Encoding', async () => {
      const encoding = await tokenizer.encode('my name is john', 'pair')

      expect(encoding.getAttentionMask()).toEqual([1, 1, 1, 1, 1])

      const ids = encoding.getIds()
      expect(Array.isArray(ids)).toBe(true)
      expect(ids).toHaveLength(5)
      for (const id of ids) {
        expect(typeof id).toBe('number')
      }

      expect(encoding.getOffsets()).toEqual([
        [0, 2],
        [3, 7],
        [8, 10],
        [11, 15],
        [0, 4],
      ])
      expect(encoding.getOverflowing()).toEqual([])
      expect(encoding.getSpecialTokensMask()).toEqual([0, 0, 0, 0, 0])
      expect(encoding.getTokens()).toEqual(['my', 'name', 'is', 'john', 'pair'])
      expect(encoding.getTypeIds()).toEqual([0, 0, 0, 0, 1])
    })

    describe('when truncation is enabled', () => {
      it('truncates with default if no truncation options provided', async () => {
        tokenizer.setTruncation(2)

        const singleEncoding = await tokenizer.encode('my name is john', null)
        expect(singleEncoding.getTokens()).toEqual(['my', 'name'])

        const pairEncoding = await tokenizer.encode('my name is john', 'pair')
        expect(pairEncoding.getTokens()).toEqual(['my', 'pair'])
      })

      it('throws an error with strategy `only_second` and no pair is encoded', async () => {
        tokenizer.setTruncation(2, { strategy: TruncationStrategy.OnlySecond })
        await expect(tokenizer.encode('my name is john', null)).rejects.toThrow(
          'Truncation error: Second sequence not provided',
        )
      })
    })

    describe('when padding is enabled', () => {
      it('does not pad anything with default options', async () => {
        tokenizer.setPadding()

        const singleEncoding = await tokenizer.encode('my name', null)
        expect(singleEncoding.getTokens()).toEqual(['my', 'name'])

        const pairEncoding = await tokenizer.encode('my name', 'pair')
        expect(pairEncoding.getTokens()).toEqual(['my', 'name', 'pair'])
      })

      it('pads to the right by default', async () => {
        tokenizer.setPadding({ maxLength: 5 })

        const singleEncoding = await tokenizer.encode('my name', null)
        expect(singleEncoding.getTokens()).toEqual(['my', 'name', '[PAD]', '[PAD]', '[PAD]'])

        const pairEncoding = await tokenizer.encode('my name', 'pair')
        expect(pairEncoding.getTokens()).toEqual(['my', 'name', 'pair', '[PAD]', '[PAD]'])
      })

      it('pads to multiple of the given value', async () => {
        tokenizer.setPadding({ padToMultipleOf: 8 })

        const singleEncoding = await tokenizer.encode('my name', null)
        expect(singleEncoding.getTokens()).toHaveLength(8)

        const pairEncoding = await tokenizer.encode('my name', 'pair')
        expect(pairEncoding.getTokens()).toHaveLength(8)
      })
    })
  })

  describe('decode', () => {
    let tokenizer: Tokenizer

    beforeEach(() => {
      const model = BPE.empty()
      tokenizer = new Tokenizer(model)
      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
    })

    it('has its callback called with the decoded string', async () => {
      const decode = tokenizer.decode.bind(tokenizer)
      expect(await decode([0, 1, 2, 3], true)).toEqual('my name is john')
    })
  })

  describe('decodeBatch', () => {
    let tokenizer: Tokenizer

    beforeEach(() => {
      const model = BPE.empty()
      tokenizer = new Tokenizer(model)
      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
    })

    it('has its callback called with the decoded string', async () => {
      const decodeBatch = tokenizer.decodeBatch.bind(tokenizer)
      expect(await decodeBatch([[0, 1, 2, 3], [4]], true)).toEqual(['my name is john', 'pair'])
    })
  })

  describe('getVocab', () => {
    it('accepts `undefined` as parameter', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)

      expect(tokenizer.getVocab(undefined)).toBeDefined()
    })

    it('returns the vocabulary', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)
      tokenizer.addTokens(['my', 'name', 'is', 'john'])

      expect(tokenizer.getVocab(true)).toEqual({
        my: 0,
        name: 1,
        is: 2,
        john: 3,
      })
    })
  })

  describe('getVocabSize', () => {
    it('accepts `undefined` as parameter', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)

      expect(tokenizer.getVocabSize(undefined)).toBeDefined()
    })
  })

  describe('setTruncation', () => {
    it('returns the full truncation configuration', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)

      tokenizer.setTruncation(2)
      // TODO Return type is weird
      // const expectedConfig: TruncationOptions = {
      //   maxLength: 2,
      //   strategy: TruncationStrategy.LongestFirst,
      //   stride: 0,
      //   direction: TruncationDirection.Right,
      // };
      // expect(truncation).toEqual(expectedConfig);
    })
  })

  describe('setPadding', () => {
    it('returns the full padding params', () => {
      const model = BPE.empty()
      const tokenizer = new Tokenizer(model)

      tokenizer.setPadding()
      // TODO Return type is weird
      // const expectedConfig: PaddingOptions = {
      //   direction: PaddingDirection.Right,
      //   padId: 0,
      //   padToken: "[PAD]",
      //   padTypeId: 0,
      // };
      // expect(padding).toEqual(expectedConfig);
    })
  })

  describe('postProcess', () => {
    let tokenizer: Tokenizer
    let firstEncoding: Encoding
    let secondEncoding: Encoding

    beforeAll(() => {
      const model = BPE.empty()
      tokenizer = new Tokenizer(model)
      tokenizer.addTokens(['my', 'name', 'is', 'john', 'pair'])
    })

    beforeEach(async () => {
      firstEncoding = await tokenizer.encode('my name is john', null)
      secondEncoding = await tokenizer.encode('pair', null)

      tokenizer.setTruncation(2)
      tokenizer.setPadding({ maxLength: 5 })
    })

    it('returns correctly with a single Encoding param', () => {
      const encoding = tokenizer.postProcess(firstEncoding)
      expect(encoding.getTokens()).toEqual(['my', 'name', '[PAD]', '[PAD]', '[PAD]'])
    })

    it('returns correctly with `undefined` as second and third parameters', () => {
      const encoding = tokenizer.postProcess(firstEncoding, undefined, undefined)
      expect(encoding.getTokens()).toEqual(['my', 'name', '[PAD]', '[PAD]', '[PAD]'])
    })

    it('returns correctly with 2 encodings', () => {
      const encoding = tokenizer.postProcess(firstEncoding, secondEncoding)
      expect(encoding.getTokens()).toEqual(['my', 'pair', '[PAD]', '[PAD]', '[PAD]'])
    })
  })
})
